Imports¶

In [28]:
import requests
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import folium
from folium.plugins import MarkerCluster

import ipywidgets as widgets
from ipywidgets import interact
import plotly.express as px
In [29]:
# Try this first
import plotly
plotly.offline.init_notebook_mode(connected=True)
In [30]:
# If it does not work, try this too
from IPython.display import Javascript
from plotly.offline import get_plotlyjs
Javascript(get_plotlyjs())
Out[30]:
In [31]:
# !curl "https://opendata.tpg.ch/api/explore/v2.1/catalog/datasets/montees-par-arret-par-ligne/exports/parquet?lang=fr&timezone=Europe%2FBerlin" >> "../data/montees-par-arret-par-ligne.parquet"
In [32]:
# !curl "https://opendata.tpg.ch/api/explore/v2.1/catalog/datasets/montees-par-arret-par-ligne/exports/json?lang=fr&timezone=Europe%2FBerlin" >> "../data/montees-par-arret-par-ligne.json"
In [33]:
# !curl "https://opendata.tpg.ch/api/explore/v2.1/catalog/datasets/arrets/exports/json?lang=fr&timezone=Europe%2FBerlin" >> "../data/arrets.json"

Arrets¶

In [34]:
data_arrets = pd.read_json('../data/arrets.json')
data_arrets.head()
Out[34]:
arretcodelong nomarret commune pays codedidoc coordonnees actif
0 _BADNF Bardonnex Douane - F SAINT-JULIEN-EN-GENEVOIS FR NaN {'lon': 6.096618, 'lat': 46.142014} N
1 _CANDF Bois Candide-Dne - F FERNEY-VOLTAIRE FR NaN {'lon': 6.092343, 'lat': 46.243755} Y
2 _CZDNF Croix-de-Rozon-Dne - F COLLONGES-SOUS-SALÈVE FR NaN {'lon': 6.137984, 'lat': 46.143688} N
3 _DOSOF Soral-Dne - F VIRY FR NaN {'lon': 6.03604, 'lat': 46.136703} Y
4 _GSDNS Grand-Saconnex-Dne - CH LE GRAND-SACONNEX CH NaN {'lon': 6.120933, 'lat': 46.24839} Y
In [35]:
# transform coordinates
data_arrets["coordonnees_lon"] = data_arrets["coordonnees"].apply(lambda x : x['lon'] if x is not None else None)
data_arrets["coordonnees_lat"] = data_arrets["coordonnees"].apply(lambda x : x['lat'] if x is not None else None)
data_arrets.drop(columns='coordonnees', inplace=True)

# replace actif by boolean

data_arrets.actif = data_arrets.actif.map({"Y":True, "N":False})
In [36]:
data_arrets
Out[36]:
arretcodelong nomarret commune pays codedidoc actif coordonnees_lon coordonnees_lat
0 _BADNF Bardonnex Douane - F SAINT-JULIEN-EN-GENEVOIS FR NaN False 6.096618 46.142014
1 _CANDF Bois Candide-Dne - F FERNEY-VOLTAIRE FR NaN True 6.092343 46.243755
2 _CZDNF Croix-de-Rozon-Dne - F COLLONGES-SOUS-SALÈVE FR NaN False 6.137984 46.143688
3 _DOSOF Soral-Dne - F VIRY FR NaN True 6.036040 46.136703
4 _GSDNS Grand-Saconnex-Dne - CH LE GRAND-SACONNEX CH NaN True 6.120933 46.248390
... ... ... ... ... ... ... ... ...
4396 ZIPL03 ZIPLO PLAN-LES-OUATES CH 8593073.0 True 6.101779 46.165997
4397 ZIPL99 ZIPLO PLAN-LES-OUATES CH 8593073.0 False 6.103465 46.167000
4398 ZIPLO02 None None CH NaN False NaN NaN
4399 ZMON01 ZI de Montréal VILLE-LA-GRAND FR 8595870.0 False 6.277687 46.203590
4400 ZOLA00 Émile Zola ANNEMASSE FR 8595778.0 False 6.231863 46.196251

4401 rows × 8 columns

In [37]:
data_arrets.fillna(value=np.nan, inplace=True)
In [38]:
data_arrets.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4401 entries, 0 to 4400
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   arretcodelong    4401 non-null   object 
 1   nomarret         4382 non-null   object 
 2   commune          4326 non-null   object 
 3   pays             4401 non-null   object 
 4   codedidoc        3746 non-null   float64
 5   actif            4401 non-null   bool   
 6   coordonnees_lon  4253 non-null   float64
 7   coordonnees_lat  4253 non-null   float64
dtypes: bool(1), float64(3), object(4)
memory usage: 245.1+ KB
In [39]:
data_arrets.isnull().sum()
Out[39]:
arretcodelong        0
nomarret            19
commune             75
pays                 0
codedidoc          655
actif                0
coordonnees_lon    148
coordonnees_lat    148
dtype: int64
In [40]:
data_arrets.duplicated().sum()
Out[40]:
0

Cartographie des arrêts¶

In [41]:
data_arrets_actif = data_arrets[data_arrets.actif == True]

def draw_map_tpg(coordonnees_centre, zoom_start=12):
    m = folium.Map(location=coordonnees_centre, zoom_start=zoom_start, min_zoom=9, control_scale=True)
    marker_cluster = MarkerCluster(name='Arrêts TPG').add_to(m)
    # folium.LayerControl().add_to(m)

    for i in range(len(data_arrets_actif)):
        lon = data_arrets_actif.iloc[i]['coordonnees_lon']
        lat = data_arrets_actif.iloc[i]['coordonnees_lat']

        if not np.isnan(lon) and not np.isnan(lat):
            popup_html = "<b>Nom :</b> {}<br>".format(data_arrets_actif.iloc[i]['nomarret'])
            popup_html += "<b>Commune :</b> {}<br>".format(data_arrets_actif.iloc[i]['commune'])
            popup_html += "<b>Pays :</b> {}<br>".format(data_arrets_actif.iloc[i]['pays'])
            popup_html += "<b>Code Arret :</b> {}<br>".format(data_arrets_actif.iloc[i]['arretcodelong'])

            if data_arrets_actif.iloc[i]['pays'] == 'CH':
                marker_color = 'red'
            else:
                marker_color = 'blue'

            folium.Marker(
                location=[lat, lon],
                tooltip=data_arrets_actif.iloc[i]['nomarret'],
                icon=folium.Icon(color=marker_color, icon="bus", prefix="fa"),
                popup=folium.Popup(popup_html, max_width=300)
            ).add_to(marker_cluster)

    return m
In [42]:
coordonnees_centre = [data_arrets_actif.coordonnees_lat.mean(), data_arrets_actif.coordonnees_lon.mean()]
m = draw_map_tpg(coordonnees_centre)
m
Out[42]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [43]:
outfp = "../outputs/map_arrets.html"
m.save(outfp)

Cartographie des arrêts centrée sur adresse utilisateur¶

In [44]:
def geocode(address):
    params = { "q": address, 'format': 'json' }
    response = requests.get(f"https://nominatim.openstreetmap.org/search", params=params)
    if response.status_code == 200:
        places = response.json()
        if len(places) > 0:
            place = places[0]
            lat = place['lat']
            lon = place['lon']
            display_name = place['display_name']
            print(f'Adresse trouvée: {display_name}')
            return place

    print("Pas d'adresse trouvée")
    return None
In [45]:
address = input("Entrez l'adresse choisie > ")
place = geocode(address)
if place is None:
    place = geocode('Genève')

coordonnees_centre = [place['lat'], place['lon']]

m = draw_map_tpg(coordonnees_centre, zoom_start=16)
folium.Marker(
    location=coordonnees_centre,
    tooltip=place['name'] if place['name'] != '' else place['display_name'],
    popup=folium.Popup(place['display_name'], max_width=300)
).add_to(m)
m
Pas d'adresse trouvée
Adresse trouvée: Genève, Schweiz/Suisse/Svizzera/Svizra
Out[45]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Montees par arret¶

In [46]:
data_montees = pd.read_parquet('../data/montees-par-arret-par-ligne.parquet')
data_montees.head()
Out[46]:
date ligne ligne_type_act jour_semaine horaire_type arret arret_code_long indice_semaine indice_jour_semaine nb_de_montees nb_de_descentes mois_annee coordonnees
0 2023-03-20 55 SECONDAIRE 1-Lundi NORMAL Vireloup VRLP01 12 1 9.63 0.00 2023-03 b'\x01\x01\x00\x00\x00\xfbw}\xe6\xac\x7f\x18@w...
1 2023-03-20 56 SECONDAIRE 1-Lundi NORMAL Bergère BRGE01 12 1 16.37 1.00 2023-03 b'\x01\x01\x00\x00\x00dw\x81\x92\x02;\x18@\xc0...
2 2023-03-20 56 SECONDAIRE 1-Lundi NORMAL Blandonnet BLDO01 12 1 64.38 12.81 2023-03 b'\x01\x01\x00\x00\x00\x0b\x99+\x83jc\x18@/\x8...
3 2023-03-20 56 SECONDAIRE 1-Lundi NORMAL Hôpital de La Tour HTOU04 12 1 23.16 2.07 2023-03 b'\x01\x01\x00\x00\x00\xa8\xc8!\xe2\xe6D\x18@\...
4 2023-03-20 56 SECONDAIRE 1-Lundi NORMAL ICC ICC00 12 1 0.00 2.96 2023-03 b'\x01\x01\x00\x00\x00\xe7p\xad\xf6\xb0g\x18@\...
In [47]:
data_montees.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3847795 entries, 0 to 3847794
Data columns (total 13 columns):
 #   Column               Dtype  
---  ------               -----  
 0   date                 object 
 1   ligne                object 
 2   ligne_type_act       object 
 3   jour_semaine         object 
 4   horaire_type         object 
 5   arret                object 
 6   arret_code_long      object 
 7   indice_semaine       int64  
 8   indice_jour_semaine  int64  
 9   nb_de_montees        float64
 10  nb_de_descentes      float64
 11  mois_annee           object 
 12  coordonnees          object 
dtypes: float64(2), int64(2), object(9)
memory usage: 381.6+ MB
In [48]:
data_montees.isnull().sum()
Out[48]:
date                    0
ligne                   0
ligne_type_act          0
jour_semaine            0
horaire_type            0
arret                   0
arret_code_long         0
indice_semaine          0
indice_jour_semaine     0
nb_de_montees           0
nb_de_descentes         0
mois_annee              0
coordonnees            29
dtype: int64
In [49]:
data_montees.duplicated().sum()
Out[49]:
0
In [50]:
data_montees['date'] = pd.to_datetime(data_montees['date'])
In [51]:
# Set default start and end dates
default_start_date = data_montees['date'].min()
default_end_date = data_montees['date'].max()

# Create date range picker widgets with default values
@interact(start_date=widgets.DatePicker(value=default_start_date), end_date=widgets.DatePicker(value=default_end_date))
def update_chart(start_date, end_date):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    filtered_data = data_montees[(data_montees['date'] >= start_date) &
                                 (data_montees['date'] <= end_date)]
    total_montees_par_arret_filtered = filtered_data.groupby(by='arret_code_long')[['nb_de_montees', 'nb_de_descentes']].sum().sort_values(by='nb_de_montees', ascending=False)
    total_montees_par_arret_filtered.reset_index(inplace=True)
    total_montees_par_arret_filtered = total_montees_par_arret_filtered.merge(right=data_arrets, how='left', left_on='arret_code_long', right_on='arretcodelong')

    fig = px.bar(total_montees_par_arret_filtered, x="nomarret", y="nb_de_montees", hover_data="arret_code_long",
                 width=1200, height=600, orientation='v',
                 title='Total Montees par Arret', labels={'arret_code_long': 'Arret Code Long', 'nomarret':'Nom arrêt', 'nb_de_montees': 'Total Montees'})
    fig.show()
interactive(children=(DatePicker(value=Timestamp('2021-03-01 00:00:00'), description='start_date'), DatePicker…
In [52]:
# Define a function to update the chart (heatmap)
def update_chart(start_date, end_date):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    filtered_data = data_montees[(data_montees['date'] >= start_date) &
                                 (data_montees['date'] <= end_date)]
    total_montees_par_arret_filtered = filtered_data.groupby(by='arret_code_long')[['nb_de_montees']].sum().sort_values(by='nb_de_montees', ascending=False)
    total_montees_par_arret_filtered.reset_index(inplace=True)
    total_montees_par_arret_filtered = total_montees_par_arret_filtered.merge(right=data_arrets, how='left', left_on='arret_code_long', right_on='arretcodelong')

    fig = px.density_mapbox(total_montees_par_arret_filtered, lat='coordonnees_lat', lon='coordonnees_lon', z='nb_de_montees',
                            hover_name='arret_code_long',hover_data='nomarret',
                             radius=10, center=dict(lat=46.2044, lon=6.1432), zoom=11,
                            mapbox_style="open-street-map",
                            # mapbox_style="carto-positron",
                            width=900, height=600,
                             title=f"Total Montees par Arret (Heatmap) (du {start_date.strftime('%d/%m/%Y')} au {end_date.strftime('%d/%m/%Y')})",
                             labels={'nb_de_montees': 'Total Montees', 'nomarret': 'Arrêt'})
    fig.show()

# Set default start and end dates
default_start_date = data_montees['date'].min()
default_end_date = data_montees['date'].max()

# Create date range picker widgets with default values
@interact(start_date=widgets.DatePicker(value=default_start_date), end_date=widgets.DatePicker(value=default_end_date))
def draw_map_and_heatmap(start_date, end_date):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    data_arrets_actif = data_arrets[data_arrets.actif == True]


    # Update the heatmap
    update_chart(start_date, end_date)
interactive(children=(DatePicker(value=Timestamp('2021-03-01 00:00:00'), description='start_date'), DatePicker…